Hausaufgaben

Kontraste zwischen Grundschule und Gymnasium

Jürgen Schneider https://uni-tuebingen.de/de/28915 (Universität Tübingen) , Britta Kohler
2020-03-23

Table of Contents



knitr::opts_chunk$set(echo = T, message = F, warning = F)
library(rio)
library(tidyverse)
library(lubridate)
library(ggstance)
library(mice)
library(miceadds)
library(naniar)
library(psych)
library(ggrepel)
library(BayesFactor)
library(bindata)
library(pander)
library(corrgram)
library(hrbrthemes)
library(MASS)
library(bain)

R.Version()

$platform
[1] "x86_64-w64-mingw32"

$arch
[1] "x86_64"

$os
[1] "mingw32"

$system
[1] "x86_64, mingw32"

$status
[1] ""

$major
[1] "3"

$minor
[1] "6.2"

$year
[1] "2019"

$month
[1] "12"

$day
[1] "12"

$`svn rev`
[1] "77560"

$language
[1] "R"

$version.string
[1] "R version 3.6.2 (2019-12-12)"

$nickname
[1] "Dark and Stormy Night"

Datenvorbereitung


# import
gym <- rio::import("data/gym.xls")
gs <- rio::import("data/gs.xls")

Grundschule


gs <- gs %>%
  dplyr::filter(`Laufende\nNummer` != "Beispiel") %>%
  mutate(id = as.numeric(`Laufende\nNummer`))

## Korrektur von Daten auf Basis der Abweichungen
 # Falschberechnungen der `Min Beginn seit Std-Anfang` (id == 189, 248, 329) werden übersprungen
 # da die selbst berechnete variable `beg_hw_min` verwendet wird
 # aufgrund der unterschiedlichen Zeitzone muss zusätzlich noch eine h drauf gerechnet werden
gs[which(gs$id == 105), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:48:00"    # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 105), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:51:00"         # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 120), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:58:00"    # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 168), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:43:00"    # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 250), "Uhrzeit Ende Vergabe"] <- "1899-12-31 12:33:00"         # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn der Std laut Plan"] <- "1899-12-31 09:20:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende der Std laut Plan"] <- "1899-12-31 10:50:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 10:24:00"    # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:40:00"         # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 307), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:16:00"         # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 315), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:55:00"         # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 220), "Ankündigung"] <- 1 # Tippfehler
gs[which(gs$id == 198), "L schreibt"] <- 1 # Tippfehler 
gs[which(gs$id == 330), "L will Notation"] <- NA # Wert "9" gibt es nicht, nicht nachvollziehbar, welcher Wert plausibel ist


# new var
gs <- gs %>%
  mutate(beg_plan = hm(format(strptime(`Uhrzeit Beginn der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
         end_plan = hm(format(strptime(`Uhrzeit Ende der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
         beg_act  = hm(format(strptime(`Uhrzeit Realer Beginn der Std`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
         beg_hw   = hm(format(strptime(`Uhrzeit Beginn HA Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
         end_hw   = hm(format(strptime(`Uhrzeit Ende Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
         stunde   = lubridate::hour(end_plan - beg_plan)*60 + lubridate::minute(end_plan - beg_plan),     # geplante Länge der Unterrichtsstunde
         beg_hw_min = lubridate::hour(beg_hw - beg_plan)*60 + lubridate::minute(beg_hw - beg_plan),       # Beginn der HA relativ zu GEPLANTEM h-Anfang
         dau_hw_min = lubridate::hour(end_hw - beg_hw)*60 + lubridate::minute(end_hw - beg_hw),
         lh_ank   = `Ankündigung`,
         lh_auf   = `L verlangt Aufmerk`,
         lh_nen   = `L nennt`,
         lh_sch   = `L schreibt`,
         lh_erl   = `L erläutert`,
         lh_wfr   = `L will Fragen`,
         lh_bfr   = `L beantwortet`,
         lh_wno   = `L will Notation`,
         lh_ano   = `L achtet Notat`,
         sh_auf = `S aufmerksam`,
         sh_mel = `S melden`,
         sh_fra = `S fragen`,
         sh_not = `S notieren`)

Gymnasium


gym <- gym %>%
  mutate(id = as.numeric(`Laufende\nNummer`))

gym[which(gym$id == 141 |
          gym$id == 148 |
          gym$id == 149 |
          gym$id == 150 |
          gym$id == 164 |
          gym$id == 175 |
          gym$id == 180 |
          gym$id == 181 |
          gym$id == 182 |
          gym$id == 188 |
          gym$id == 189), "L schreibt"] <- 1 # Tippfehler, in FB nachgeschaut


# new var
gym <- gym %>%
  mutate(beg_hw_min = `Min Beginn`,
         dau_hw_min = `HA stellen Dauer`,
         stunde     = case_when(Doppelstundenmodell == 0 ~ 45,
                                Doppelstundenmodell == 1 ~ 90),
         Schulart = "Gymnasium",
         lh_ank   = `Ankündigung`,
         lh_auf   = `L sorg Aufmerk`,
         lh_nen   = `L nennt`,
         lh_sch   = `L schreibt`,
         lh_erl   = `Erläuterung gesamt`,
         lh_wfr   = `L will Fragen`,
         lh_bfr   = `L beantwortet`,
         lh_wno   = `L will Notation`,
         lh_ano   = `L achtet Notat`,
         sh_auf = `S aufmerksam`,    
         sh_mel = `S melden`,
         sh_fra = `Fragen gesamt`,
         sh_not = `S notieren`)

Erstellung gemeinsamer Datensatz


gs_p <- gs %>%
  dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
         lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)

gym_p <- gym %>%
  dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
         lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)

p_data <- rbind(gs_p, gym_p)

# filter away the 10 cross grade classes as they are difficult to analyze 
# (combine characteristics of two grades in one)
p_data <- p_data %>%
  dplyr::filter(Klassenstufe != "1_2" | is.na(Klassenstufe)) %>%
  mutate(Klassenstufe = as.numeric(Klassenstufe))

Imputation der fehlenden Daten


### SKALENNIVEAUS ###
  # lh_ank: dichotom [0,1]
  # lh_auf: dichotom [0,1]
  # lh_nen: dichotom [0,1]
  # lh_sch: dichotom [0,1]
  # lh_erl: dichotom [0,1]
  # lh_wfr: dichotom [0,1]
  # lh_bfr: metrisch, absolutskaliert, schief
  # lh_wno: dichotom [0,1]
  # lh_ano: dichotom [0,1]
  # sh_auf: ordinalskaliert [1:5]
  # sh_mel: metrisch, absolutskaliert, schief
  # sh_fra: metrisch, absolutskaliert, schief
  # sh_not: metrisch, intervllskaliert, bimodal

vis_miss(p_data)


gg_miss_upset(p_data)

Choices

imputation model

p_data$Schulart <- as.factor(p_data$Schulart)
p_data$Klassenstufe <- as.numeric(p_data$Klassenstufe)

model_tab <- data.frame(variable = c("beg_hw_min", "dau_hw_min", "stunde", "Schulart", "Klassenstufe", "lh_ank", "lh_auf", "lh_nen", "lh_sch", 
                                     "lh_erl", "lh_wfr", "lh_bfr", "lh_wno", "lh_ano", "sh_auf", "sh_mel", "sh_fra", "sh_not"),
                        'scale type' = c("metric", "metric", "metric", "nominal", "metric", "binary", "binary", "binary", "binary", 
                                         "binary", "binary", "metric", "binary", "binary", "metric", "metric", "metric", "metric"),
                        method = c("pmm", "pmm", "pmm", "polyreg", "pmm", "logreg", "logreg", "logreg", "logreg", 
                                   "logreg", "logreg", "pmm", "logreg", "logreg", "pmm", "pmm", "pmm", "pmm"))

pander(model_tab)
variable scale.type method
beg_hw_min metric pmm
dau_hw_min metric pmm
stunde metric pmm
Schulart nominal polyreg
Klassenstufe metric pmm
lh_ank binary logreg
lh_auf binary logreg
lh_nen binary logreg
lh_sch binary logreg
lh_erl binary logreg
lh_wfr binary logreg
lh_bfr metric pmm
lh_wno binary logreg
lh_ano binary logreg
sh_auf metric pmm
sh_mel metric pmm
sh_fra metric pmm
sh_not metric pmm

# Defining methods
meth <- c("pmm", "pmm", "pmm", "polyreg", "pmm", "logreg", "logreg", "logreg", "logreg", 
          "logreg", "logreg", "pmm", "logreg", "logreg", "pmm", "pmm", "pmm", "pmm")

number of imputations
1000


m <- 1000
selection of predictors

# cor(y = p_data[,-4], x = !is.na(p_data[,-4]), use = "pair")
corrgram(p_data, lower.panel = "panel.pie", upper.panel = "panel.cor")


# set predictor specifications
ini <- mice(p_data, 
            maxit = 0, 
            m = m,
            meth = meth,
            seed = 666
            )

pred <- ini$predictorMatrix
pred[,"stunde"] <- 0 # stunde highly correlated with beg_hw_min
pred[,"lh_bfr"] <- 0 # lh_bfr highly correlated with sh_fra

variables that are function of other variables
None.

which variables to impute
All.

number of iterations

20


maxit <- 20

imputation


imp <- mice(p_data, 
            maxit = maxit, 
            m = m,
            meth = meth,
            pred = pred,
            seed = 666,
            printFlag = F
            )

check imputation

plausible values

## will produce one plot for each imputation (200) on each variable
## uncomment if interested, otherwise I spare the space
# lattice::stripplot(imp, pch = 20, cex = 1.2)

All values seem plausible.

check convergence

plot(imp)

Bayes Factor Design Analysis für Kreuztabellen

Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.

Effektstärke

Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(\varphi=.2\) an. Dieser liegt zwischen einem kleinen (\(\varphi=.1\)) und mittleren (\(\varphi=.3\)) Effekt nach Cohen (1988).


true_hyp <- c("H0 is true", "H1 is true") # two possibilities
phi <- c(0, .2)    # determine effect sizes for hypotheses

Anzahl Simulationen

Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.


n_sim <- 1000 # number of simulations

Ergebnisse Simulation


sim_cor_results <- data.frame()   # set up data frame for results

for(j in true_hyp) {                               # loop over both hypotheses
  if (j == "H0 is true")
  bincorr <- matrix(c(1,phi[1],phi[1],1), ncol=2)  # create correlation matrix for H0
  if (j == "H1 is true")
  bincorr <- matrix(c(1,phi[2],phi[2],1), ncol=2)  # create correlation matrix for H1
  
  for (n in 1:n_sim) {
    sim_df <- rmvbin(n = 510,
                     margprob = c(0.5, 0.5),
                     bincorr = bincorr)
    
    sim_imp_m <- matrix(table(sim_df[,1], sim_df[,2]), 2, 2)
    sim_fit <- contingencyTableBF(sim_imp_m, sampleType="indepMulti",fixedMargin = "cols")
    sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "BayesFactor"] <- extractBF(sim_fit)$bf
    sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
    rm(sim_imp_m, sim_fit)
  }
}

# categorize if result is correct, incorrect or inconclusive
sim_cor_results <- sim_cor_results %>%
  mutate(BF3 = case_when(
                    BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
         BF5 = case_when(
                    BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
         BF10 = case_when(
                    BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
         )

# pivot into long data frame for plot
sim_cor_results_l <- pivot_longer(sim_cor_results, 
                                  cols = 3:5, 
                                  names_to = "BF Threshold",
                                  values_to = "decision") 
# order factor for plot
sim_cor_results_l$`BF Threshold` <- factor(sim_cor_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))


# hrbrthemes::import_roboto_condensed()

ggplot(sim_cor_results_l, aes(trueH, fill = decision)) +
    geom_bar(position = "fill") +
    geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
    position =position_stack(vjust = 0.5), stat= "count",
    color = "white", size = 5) +
    coord_flip() +
    facet_wrap(~`BF Threshold`, ncol = 1) +
    labs(title = "Results of the Bayes Factor Design Analysis",
         subtitle = "For three different Bayes Factors",
         caption = paste("In % (rounded), based on", n_sim, "simulations")) +
    xlab("True Hypothesis") +
    scale_fill_viridis_d() +
    theme_ipsum_rc()

Die Ergebnisse zeigen, dass bei einem BF von 3 kaum falsch-positive Ergebnisse zustande kommen (~1%) und die Power jeweils zufriedenstellend bzw. sehr hoch ist. Es ist somit nicht nötig einen höheren BF zu verweneden, um falsch-positive Ergebnisse zu vermeiden. Höhere BFs hätten zudem den Nachteil, dass vermehrt inkonklusive Ergebnisse auftreten. Wir verwenden bei der Auswertung der binären Zusammenhänge (Kreuztabellen) somit einen Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\).

Bayes Factor Design Analysis für t-Tests

Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.

Effektstärke

Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(d=.35\) an. Dieser liegt zwischen einem kleinen (\(d=.2\)) und mittleren (\(d=.5\)) Effekt nach Cohen (1988).


true_hyp <- c("H0 is true", "H1 is true") # two possibilities
true_d <- c(0, .35)    # determine effect sizes for hypotheses

Anzahl Simulationen

Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.


n_sim <- 1000 # number of simulations

Ergebnisse Simulation


sim_ttest_results <- data.frame()   # set up data frame for results

for(j in true_hyp) {                            # loop over both hypotheses

  for (n in 1:n_sim) {                             # loop over all simulations
      sim_ttest_df <- data.frame(mvrnorm(n = 510,                         # fixed n of 510
                                     mu = if(j == "H0 is true")
                                          c(0,true_d[1]) else              # create data set for H0
                                          if(j == "H1 is true")
                                          c(0,true_d[2]),             # create data set for H1
                                     Sigma = matrix(c( 1, .5,         # vcov matrix
                                                      .5,  1),
                                                      2, 2)))
      
    # pivot longer to insert it into a lm
    sim_ttest_df_l <- pivot_longer(sim_ttest_df, 1:2, names_to = "group", values_to = "dependentVar")
    
    ### LINEAR MODEL ############################################ #
    # compute the means of each group
    sim_fit_ttest <- lm(dependentVar ~ group-1, 
                        data = sim_ttest_df_l)
    ### BAIN #################################################### #    
    # generating hypotheses
    hypotheses <- "groupX1 = groupX2; groupX1 < groupX2"   #H1 and H2 respectively
    
    bf_ttest <- bain(sim_fit_ttest, 
                     hypothesis = hypotheses
                     )
    
    sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim),"BayesFactor"] <- bf_ttest$BFmatrix[2,1]  # BF(H2,H1)
    sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
    rm(bf_ttest, sim_fit_ttest, sim_ttest_df, sim_ttest_df_l)
  }
}


# categorize if result is correct, incorrect or inconclusive
sim_ttest_results <- sim_ttest_results %>%
  mutate(BF3 = case_when(
                    BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
         BF5 = case_when(
                    BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
         BF10 = case_when(
                    BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
                    BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
                    BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
                    BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
                    BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
                    BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
         )

# pivot into long data frame for plot
sim_ttest_results_l <- pivot_longer(sim_ttest_results, 
                                    cols = 3:5, 
                                    names_to = "BF Threshold",
                                    values_to = "decision") 
# order factor for plot
sim_ttest_results_l$`BF Threshold` <- factor(sim_ttest_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))


# hrbrthemes::import_roboto_condensed()

ggplot(sim_ttest_results_l, aes(trueH, fill = decision)) +
    geom_bar(position = "fill") +
    geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
    position =position_stack(vjust = 0.5), stat= "count",
    color = "white", size = 5) +
    coord_flip() +
    facet_wrap(~`BF Threshold`, ncol = 1) +
    labs(title = "Results of the Bayes Factor Design Analysis",
         subtitle = "For three different Bayes Factors",
         caption = paste("In % (rounded), based on", n_sim, "simulations")) +
    xlab("True Hypothesis") +
    scale_fill_viridis_d() +
    theme_ipsum_rc()

Hier zeigen sich bei einem \(N=510\) und einem angenommenen Cohen’s \(d=.35\) ebenfalls (nahezu) keine falsch-positiven Ergebnisse. Somit wird auch hier auf ein Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\) verwendet.

Zeitpunkt der HA-Vergabe

Deskriptive Daten


pander::panderOptions('digits', 3)
## 45 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart45 <- p_data %>% 
  dplyr::filter(stunde == 45)

# Deskriptiven Werte für diese Stunden
descriptives45 <-  p_data %>%
    dplyr::filter(stunde == 45) %>%
    dplyr::select(beg_hw_min, dau_hw_min) %>%
    psych::describeBy(group = schulart45$Schulart)

pander::pander(descriptives45)

# Modus
dens_45gs <- schulart45 %>%      # filter only measured cases
  dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_45gs <- density(dens_45gs$beg_hw_min)   # get density curve

dens_45gy <- schulart45 %>%      # filter only measured cases
  dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium 
dens_45gy <- density(dens_45gy$beg_hw_min)   # get density curve
paste("Modus der Hausaufgabenvergabe in 45min Stunden Grundschule =", round(dens_45gs$x[which.max(dens_45gs$y)], 1))

[1] "Modus der Hausaufgabenvergabe in 45min Stunden Grundschule = 41.1"

paste("Modus der Hausaufgabenvergabe in 45min Stunden Gymnasium =", round(dens_45gy$x[which.max(dens_45gy$y)], 1))

[1] "Modus der Hausaufgabenvergabe in 45min Stunden Gymnasium = 42.9"

## 90 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart90 <- p_data %>% 
  dplyr::filter(stunde == 90)

# Deskriptiven Werte für diese Stunden
descriptives90 <-  p_data %>%
    dplyr::filter(stunde == 90) %>%
    dplyr::select(beg_hw_min, dau_hw_min) %>%
    psych::describeBy(group = schulart90$Schulart)

pander::pander(descriptives90)

# Modus
dens_90gs <- schulart90 %>%      # filter only measured cases
  dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_90gs <- density(dens_90gs$beg_hw_min)   # get density curve

dens_90gy <- schulart90 %>%      # filter only measured cases
  dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium 
dens_90gy <- density(dens_90gy$beg_hw_min)   # get density curve
paste("Modus der Hausaufgabenvergabe in 90min Stunden Grundschule =", round(dens_90gs$x[which.max(dens_90gs$y)], 1))

[1] "Modus der Hausaufgabenvergabe in 90min Stunden Grundschule = 81.8"

paste("Modus der Hausaufgabenvergabe in 90min Stunden Gymnasium =", round(dens_90gy$x[which.max(dens_90gy$y)], 1))

[1] "Modus der Hausaufgabenvergabe in 90min Stunden Gymnasium = 87.2"

45min Stunden


#### IMPUTATION #### #
# separate Imputation für 45min
p_data_45 <- p_data %>%
    dplyr::filter(stunde==45)

imp45 <- mice(p_data_45, 
              maxit = maxit, 
              m = m,
              meth = meth,
              pred = pred,
              seed = 666,
              printFlag = F
              )

#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)

## loop over all m imputations
for(i in 1:m) {
  # fit model
  fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}


## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_45 <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_45)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF         PMPa  PMPb 
H1 0.000  0.017  1.000  1.000  0.000 0.017 0.001      0.000 0.000
H2 1.000  1.000  0.000  0.500  0.000 0.500 0.000      0.000 0.000
H3 1.000  1.000  1.000  0.500  1.000 0.500 333236.770 1.000 0.667
Hu                                                          0.333

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_45$BFmatrix)

             H1          H2           H3
H1 1.000000e+00    115.0622 3.452867e-04
H2 8.690949e-03      1.0000 3.000869e-06
H3 2.896144e+03 333236.7735 1.000000e+00

###### DESCRIPTIVE PLOT ############################################### #

## Awesome Rainvloud plots, check out source:
# Allen M, Poggiali D, Whitaker K et al. Raincloud plots: a multi-platform tool for 
#           robust data visualization [version 1; peer review: 2 approved]. 
#           Wellcome Open Res 2019, 4:63. DOI: 10.12688/wellcomeopenres.15191.1
source("R_rainclouds.R")

ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
                geom_flat_violin(position = position_nudge(x = 0, y = 0), adjust = 1.6, trim = FALSE, alpha = .3) +
                geom_boxplot(aes(x=""), position = position_nudge(x = c(.49, .54), y = 0), 
                             outlier.shape = NA, alpha = .5, width = .04, colour = "black") +
                geom_hline(yintercept = 45, linetype = "dashed", colour = "#696f71", size = 1) +
                scale_colour_brewer(palette = "Set1")+
                scale_fill_brewer(palette = "Set1") +
                scale_y_continuous(expand = c(0, 0), breaks = c(0, 10,20,30,40,45,50,60), limits = c(0, 65)) +
                scale_x_discrete(expand = c(0, 0)) +
                ylab("Minuten seit geplantem Stundenbeginn") +
                xlab("% Häufigkeit") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) +
                coord_flip()

90min Stunden

Basierend auf untersuchten (nicht-fehlenden) Daten.


#### IMPUTATION #### #
# separate Imputation für 90min
p_data_90 <- p_data %>%
    dplyr::filter(stunde==90)

imp90 <- mice(p_data_90, 
              maxit = maxit, 
              m = m,
              meth = meth,
              pred = pred,
              seed = 666,
              printFlag = F
              )

#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")


## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_90 <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_90)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF     PMPa  PMPb 
H1 0.011  0.010  1.000  1.000  0.011 0.010 1.075  0.350 0.264
H2 1.000  1.000  0.023  0.500  0.023 0.500 0.023  0.015 0.011
H3 1.000  1.000  0.977  0.500  0.977 0.500 43.415 0.636 0.480
Hu                                                      0.245

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_90$BFmatrix)

           H1       H2         H3
H1 1.00000000 23.87773 0.54998494
H2 0.04188002  1.00000 0.02303338
H3 1.81823161 43.41525 1.00000000

###### DESCRIPTIVE PLOT ############################################### #

ggplot(p_data%>%dplyr::filter(stunde == 90), 
       aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
                geom_flat_violin(position = position_nudge(x = 0, y = 0), 
                                 adjust = 1.6, trim = FALSE, alpha = .3) +
                geom_boxplot(aes(x=""), position = position_nudge(x = c(.50, .56), y = 0), 
                             outlier.shape = NA, alpha = .5, width = .05, colour = "black") +
                geom_hline(yintercept = 90, linetype = "dashed", colour = "#696f71", size = 1) +
                scale_colour_brewer(palette = "Set1")+
                scale_fill_brewer(palette = "Set1") +
                scale_y_continuous(expand = c(0, 0), 
                                   breaks = c(0, 10,20,30,40,50,60,70,80,90), 
                                   limits = c(0,95)) +
                scale_x_discrete(expand = c(0, 0)) +
                ylab("Minuten seit geplantem Stundenbeginn") +
                xlab("% Häufigkeit") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) +
                coord_flip()

Zeitbedarf HA

45min Stunden


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")


## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"


bf_45 <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_45)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF           PMPa  PMPb 
H1 0.000  0.069  1.000  1.000  0.000 0.069 0.000        0.000 0.000
H2 1.000  1.000  1.000  0.500  1.000 0.500 19180728.204 1.000 0.667
H3 1.000  1.000  0.000  0.500  0.000 0.500 0.000        0.000 0.000
Hu                                                            0.333

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_45$BFmatrix)

             H1           H2           H3
H1 1.000000e+00 6.970502e-06 1.336994e+02
H2 1.434617e+05 1.000000e+00 1.918075e+07
H3 7.479464e-03 5.213562e-08 1.000000e+00

###### DESCRIPTIVE PLOT ############################################### #

ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
                geom_flat_violin(position = position_nudge(x = .1, y = 0), 
                                 adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
                geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
                geom_boxplot(position = position_nudge(x = -.15, y = 0), 
                             outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
                scale_colour_brewer(palette = "Set1")+
                scale_y_continuous(expand = c(0, 0)) +
                scale_fill_brewer(palette = "Set1") +
                ylab("Minuten") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

90min Stunden


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_90 <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_90)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF       PMPa  PMPb 
H1 0.001  0.039  1.000  1.000  0.001 0.039 0.025    0.012 0.008
H2 1.000  1.000  1.000  0.500  1.000 0.500 2896.832 0.987 0.661
H3 1.000  1.000  0.000  0.500  0.000 0.500 0.000    0.000 0.000
Hu                                                        0.331

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_90$BFmatrix)

           H1           H2         H3
H1  1.0000000 0.0126602106   36.67451
H2 78.9876273 1.0000000000 2896.83229
H3  0.0272669 0.0003452047    1.00000

###### DESCRIPTIVE PLOT ############################################### #

ggplot(p_data%>%dplyr::filter(stunde == 90), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
                geom_flat_violin(position = position_nudge(x = .1, y = 0), 
                                 adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
                geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
                geom_boxplot(position = position_nudge(x = -.15, y = 0), 
                             outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
                scale_colour_brewer(palette = "Set1")+
                scale_y_continuous(expand = c(0, 0)) +
                scale_fill_brewer(palette = "Set1") +
                ylab("Minuten") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Häufigkeiten der Lehrpersonen-/SuS-Handlungen

Deskriptive Daten


descriptives <-  p_data %>%
    dplyr::select(lh_ank, lh_auf, lh_nen, lh_sch,
                  lh_erl, lh_wfr, lh_wno, lh_ano,
                  lh_bfr, sh_auf, sh_mel, sh_fra, sh_not) %>%
    psych::describeBy(group = p_data$Schulart)

panderOptions('digits', 3)
pander::pander(descriptives)

L kündigt HA an

Prädiktor Schulart


# Compute density of BFs
cor_results <- data.frame()

for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_ank,     # create contingency table
                        mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",
                            fixedMargin = "cols")          # GS and GY as fixed
  cor_results[i,"lh_ank-Schulart"] <- extractBF(fit)$bf    # save BF in data frame
  
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain
                                                                         # Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

# show summary of BFs
summary(cor_results[,"lh_ank-Schulart"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1730    9621    9621    9294    9621   38702 

# plot BFs distribution
ggplot(cor_results, aes(x = `lh_ank-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_ank-Schulart`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1430  0.1520  0.1533  0.1532  0.1542  0.1639 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_ank,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_ank-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_ank-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2093   18554   19939   17798   20601   27261 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ank-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_ank-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #
# plot (non-missing) values as descriptive
lh_ank_p <- p_data %>%
  dplyr::filter(!is.na(lh_ank) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_ank)) %>%
  summarize(lh_ank = mean(lh_ank, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_ank_p, aes(x = Klassenstufe, y = lh_ank, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_ank*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L fordert Aufmerksamkeit

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_auf, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_auf-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_auf-Schulart"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  11132   71353   85423  131640  159914  774708 

ggplot(cor_results, aes(x = `lh_auf-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_auf-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1999  0.2129  0.2166  0.2169  0.2208  0.2352 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_auf,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_auf-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_auf-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  88811  596357 1085965 1381383 1685379 6867900 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_auf-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_auf-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_auf_p <- p_data %>%
  dplyr::filter(!is.na(lh_auf) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_auf)) %>%
  summarize(lh_auf = mean(lh_auf, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_auf_p, aes(x = Klassenstufe, y = lh_auf, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_auf*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L nennt HA

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_nen, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_nen-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_nen-Schulart"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.05133 0.05412 0.06194 0.08576 0.07829 3.59638 

ggplot(cor_results, aes(x = `lh_nen-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_nen-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-0.033835 -0.012622 -0.003918 -0.001079  0.009004  0.071875 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_nen,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_nen-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_nen-Klassenstufe"])  

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
  0.1036   0.1111   0.1391   0.8039   0.2199 504.1839 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_nen-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_nen-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_nen_p <- p_data %>%
  dplyr::filter(!is.na(lh_nen) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_nen)) %>%
  summarize(lh_nen = mean(lh_nen, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_nen_p, aes(x = Klassenstufe, y = lh_nen, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_nen*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L schreibt HA an

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_sch, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_sch-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_sch-Schulart"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.4367  0.6171  0.7092  0.7208  0.8198  1.4633 

ggplot(cor_results, aes(x = `lh_sch-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_sch-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.06973 0.07983 0.08320 0.08339 0.08661 0.09953 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_sch,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_sch-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_sch-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1986  0.2669  0.2969  0.3188  0.3514  0.6692 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_sch-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_sch-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_sch_p <- p_data %>%
  dplyr::filter(!is.na(lh_sch) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_sch)) %>%
  summarize(lh_sch = mean(lh_sch, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_sch_p, aes(x = Klassenstufe, y = lh_sch, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_sch*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L erläutert HA

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_erl, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_erl-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_erl-Schulart"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1248  0.1411  0.1488  0.1512  0.1578  0.1906 

ggplot(cor_results, aes(x = `lh_erl-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_erl-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.02237 0.03208 0.03535 0.03530 0.03829 0.04922 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_erl,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_erl-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_erl-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1253  0.1460  0.1560  0.1595  0.1697  0.2328 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_erl-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_erl-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_erl_p <- p_data %>%
  dplyr::filter(!is.na(lh_erl) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_erl)) %>%
  summarize(lh_erl = mean(lh_erl, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_erl_p, aes(x = Klassenstufe, y = lh_erl, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_erl*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L will Fragen

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_wfr, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_wfr-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_wfr-Schulart"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1198  0.1420  0.1530  0.1509  0.1530  0.1976 

ggplot(cor_results, aes(x = `lh_wfr-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_wfr-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.02354 0.03273 0.03522 0.03525 0.03765 0.04722 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_wfr,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_wfr-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_wfr-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1036  0.1076  0.1102  0.1106  0.1132  0.1267 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wfr-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_wfr-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_wfr_p <- p_data %>%
  dplyr::filter(!is.na(lh_wfr) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_wfr)) %>%
  summarize(lh_wfr = mean(lh_wfr, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_wfr_p, aes(x = Klassenstufe, y = lh_wfr, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_wfr*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L will Notation

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_wno, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_wno-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_wno-Schulart"])

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
1.754e+13 5.581e+13 8.324e+13 9.702e+13 1.502e+14 1.502e+14 

ggplot(cor_results, aes(x = `lh_wno-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_wno-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.3622  0.3714  0.3738  0.3734  0.3760  0.3803 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_wno,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_wno-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_wno-Klassenstufe"])  

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
2.011e+12 9.610e+12 2.044e+13 2.122e+13 3.116e+13 5.261e+13 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wno-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_wno-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_wno_p <- p_data %>%
  dplyr::filter(!is.na(lh_wno) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_wno)) %>%
  summarize(lh_wno = mean(lh_wno, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_wno_p, aes(x = Klassenstufe, y = lh_wno, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_wno*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L achtet auf Notation

Prädiktor Schulart


# Compute density of BFs
for (i in 1:m) {
  imp_m <- matrix(table(mice::complete(imp, i)$lh_ano, mice::complete(imp, i)$Schulart), 2, 2)
  fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
  cor_results[i,"lh_ano-Schulart"] <- extractBF(fit)$bf
    
  chains <- posterior(fit, iterations = 1000)  # get posterior probabilities
  showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"]  # compute ratio for GS
  showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"]  # compute ratio for GY
  cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
  
  rm(imp_m, fit, chains)
}

summary(cor_results[,"lh_ano-Schulart"])

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
2.479e+14 4.433e+15 1.396e+16 1.711e+16 2.613e+16 4.685e+16 

ggplot(cor_results, aes(x = `lh_ano-Schulart`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_ano-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))


# plot difference of 
summary(cor_results[,"prob_GSminusGY"])

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.3720  0.3873  0.3912  0.3907  0.3945  0.4010 

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_ano,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_ano-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_ano-Klassenstufe"])  

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
1.252e+13 2.947e+14 6.727e+14 9.168e+14 1.349e+15 5.043e+15 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ano-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_ano-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_ano_p <- p_data %>%
  dplyr::filter(!is.na(lh_ano) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_ano)) %>%
  summarize(lh_ano = mean(lh_ano, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_ano_p, aes(x = Klassenstufe, y = lh_ano, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(lh_ano*100,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("% Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

L beantwortet Fragen

Prädiktor Schulart


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(lh_bfr ~ Schulart-1, data = mice::complete(imp, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_hyp <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_hyp)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF     PMPa  PMPb 
H1 2.523  0.145  1.000  1.000  2.523 0.145 17.345 0.897 0.853
H2 1.000  1.000  0.749  0.500  0.749 0.500 2.979  0.077 0.074
H3 1.000  1.000  0.251  0.500  0.251 0.500 0.336  0.026 0.025
Hu                                                      0.049

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_hyp$BFmatrix)

           H1         H2        H3
H1 1.00000000 11.5835903 34.510626
H2 0.08632902  1.0000000  2.979269
H3 0.02897658  0.3356529  1.000000

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$lh_bfr,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"lh_bfr-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"lh_bfr-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1036  0.1293  0.1512  0.1561  0.1770  0.3039 

# plot BFs distribution
ggplot(kl_results, aes(x = `lh_bfr-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `lh_bfr-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

lh_bfr_p <- p_data %>%
  dplyr::filter(!is.na(lh_bfr) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(lh_bfr)) %>%
  summarize(lh_bfr = mean(lh_bfr, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(lh_bfr_p, aes(x = Klassenstufe, y = lh_bfr, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = round(lh_bfr, 2), vjust = 3)) +
  scale_y_continuous(limits = c(0,(max(lh_bfr_p$lh_bfr)*1.1)), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("∅ absolute Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

SuS sind aufmerksam

Prädiktor Schulart


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(sh_auf ~ Schulart-1, data = mice::complete(imp, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_hyp <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_hyp)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF         PMPa  PMPb 
H1 0.000  0.199  1.000  1.000  0.000 0.199 0.001      0.001 0.000
H2 1.000  1.000  1.000  0.500  1.000 0.500 176990.839 0.999 0.666
H3 1.000  1.000  0.000  0.500  0.000 0.500 0.000      0.000 0.000
Hu                                                          0.333

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_hyp$BFmatrix)

             H1          H2          H3
H1 1.000000e+00 7.07254e-04    125.1775
H2 1.413919e+03 1.00000e+00 176990.8373
H3 7.988658e-03 5.65001e-06      1.0000

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$sh_auf,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"sh_auf-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"sh_auf-Klassenstufe"])  

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
2.000e+00 1.440e+03 1.569e+04 1.176e+08 1.690e+05 6.877e+10 

# plot BFs distribution
ggplot(kl_results, aes(x = `sh_auf-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `sh_auf-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

sh_auf_p <- p_data %>%
  dplyr::filter(!is.na(sh_auf) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(sh_auf)) %>%
  summarize(sh_auf = mean(sh_auf, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(sh_auf_p, aes(x = Klassenstufe, y = sh_auf, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = round(sh_auf,1), vjust = -3)) +
  scale_y_continuous(limits = c(1,5), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("Zustimmung (Likert Item)") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

SuS melden sich

Prädiktor Schulart


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(sh_mel ~ Schulart-1, data = mice::complete(imp, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_hyp <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_hyp)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF     PMPa  PMPb 
H1 3.695  0.170  1.000  1.000  3.695 0.170 21.714 0.916 0.879
H2 1.000  1.000  0.504  0.500  0.504 0.500 1.018  0.043 0.041
H3 1.000  1.000  0.496  0.500  0.496 0.500 0.983  0.042 0.040
Hu                                                      0.040

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_hyp$BFmatrix)

           H1         H2        H3
H1 1.00000000 21.5251188 21.906863
H2 0.04645735  1.0000000  1.017735
H3 0.04564779  0.9825742  1.000000

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$sh_mel,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"sh_mel-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"sh_mel-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1036  0.1049  0.1091  0.1192  0.1209  0.6346 

# plot BFs distribution
ggplot(kl_results, aes(x = `sh_mel-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `sh_mel-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

sh_mel_p <- p_data %>%
  dplyr::filter(!is.na(sh_mel) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(sh_mel)) %>%
  summarize(sh_mel = mean(sh_mel, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(sh_mel_p, aes(x = Klassenstufe, y = sh_mel, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = round(sh_mel, 2), vjust = 3)) +
  scale_y_continuous(limits = c(0,(max(sh_mel_p$sh_mel)*1.1)), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("∅ absolute Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

SuS fragen

Prädiktor Schulart


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(sh_fra ~ Schulart-1, data = mice::complete(imp, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_hyp <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_hyp)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF     PMPa  PMPb 
H1 1.716  0.133  1.000  1.000  1.716 0.133 12.940 0.866 0.812
H2 1.000  1.000  0.846  0.500  0.846 0.500 5.475  0.113 0.106
H3 1.000  1.000  0.154  0.500  0.154 0.500 0.183  0.021 0.019
Hu                                                      0.063

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_hyp$BFmatrix)

           H1        H2        H3
H1 1.00000000 7.6514875 41.892639
H2 0.13069354 1.0000000  5.475097
H3 0.02387054 0.1826452  1.000000

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$sh_fra,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"sh_fra-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"sh_fra-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1104  0.1962  0.2105  0.2105  0.2251  0.3108 

# plot BFs distribution
ggplot(kl_results, aes(x = `sh_fra-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `sh_fra-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

sh_fra_p <- p_data %>%
  dplyr::filter(!is.na(sh_fra) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(sh_fra)) %>%
  summarize(sh_fra = mean(sh_fra, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(sh_fra_p, aes(x = Klassenstufe, y = sh_fra, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = round(sh_fra, 2), vjust = 3)) +
  scale_y_continuous(limits = c(0,(max(sh_fra_p$sh_fra)*1.1)), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("∅ absolute Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

SuS notieren

Prädiktor Schulart


#### Compute BFs within informed hypotheses framework (bain) ##################################### #

### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

mean_df <- data.frame(Grundschule = as.numeric(),
                      Gymnasium = as.numeric()
                      )

# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)


## loop over all m imputations
for(i in 1:m) {
  fit_lm <- lm(sh_not ~ Schulart-1, data = mice::complete(imp, i))
  
  var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
  vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
  vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
  
  # collect estimates of the means
  mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
  mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
  
  rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}

## make var matrices 
# compute the mean over var
vcov_df <- vcov_df %>%
        summarize_all(mean)

# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)

variances <- list(mat1, mat2)

## compute mean of estimates
bf_data <- mean_df %>%
        summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")

## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"

bf_hyp <- bain(bf_data, 
              hypothesis = hypotheses,
              n = samp_gr,           # size of the groups
              Sigma = variances,     # matrices of residual variances of groups
              group_parameters = 1,  # there is 1 group specific parameter (the mean in each group)
              joint_parameters = 0   # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
              )

print(bf_hyp)

Bayesian informative hypothesis testing for an object of class numeric:

   Fit_eq Com_eq Fit_in Com_in Fit   Com   BF       PMPa  PMPb 
H1 0.000  0.005  1.000  1.000  0.000 0.005 0.068    0.033 0.022
H2 1.000  1.000  1.000  0.500  1.000 0.500 2905.398 0.967 0.652
H3 1.000  1.000  0.000  0.500  0.000 0.500 0.000    0.000 0.000
Hu                                                        0.326

Hypotheses:
  H1: Gym=Grund
  H2: Gym<Grund
  H3: Gym>Grund

Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.

print(bf_hyp$BFmatrix)

            H1           H2         H3
H1  1.00000000 0.0342085490   99.38945
H2 29.23245880 1.0000000000 2905.39808
H3  0.01006143 0.0003441869    1.00000

Prädiktor Klassenstufe


# Compute density of BFs
kl_results <- data.frame()

for (i in 1:m) {
  fit <- correlationBF(y = mice::complete(imp, i)$sh_not,    # correlation
                       x = mice::complete(imp, i)$Klassenstufe)
  kl_results[i,"sh_not-Klassenstufe"] <- extractBF(fit)$bf    # save BF in data frame
  
  rm(fit)
}

# show summary of BFs
summary(kl_results[,"sh_not-Klassenstufe"])  

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.362   5.455   8.044   9.964  12.732  44.463 

# plot BFs distribution
ggplot(kl_results, aes(x = `sh_not-Klassenstufe`)) +
                geom_density(alpha = .3, fill = "#b4a069", color = NA) +
                geom_jitter(aes(x = `sh_not-Klassenstufe`, y=""), 
                            height = 1, width = 0, alpha = .3, 
                            size = 2.5, fill = "#b4a069", color = "#b4a069") +
                geom_vline(xintercept = (1/3), color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                geom_vline(xintercept = 3, color = "red", alpha = .4, 
                           size = 2, linetype = "dashed") +
                scale_x_continuous(expand = c(0, 0), trans = 'log10') +
                scale_y_discrete(expand = c(0, 0)) +
                ylab("% Häufigkeit") +
                xlab("BF [log10]") +
                theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7"))

Plot der deskriptiven Daten


###### DESCRIPTIVE PLOT ############################################### #

sh_not_p <- p_data %>%
  dplyr::filter(!is.na(sh_not) & !is.na(Klassenstufe)) %>%
  group_by(Klassenstufe) %>%
  mutate(length = length(sh_not)) %>%
  summarize(sh_not = mean(sh_not, na.rm=T),
            length_n = mean(length)) %>%
  ungroup() %>%
  mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
         Schulart = as.factor(case_when(
                      Klassenstufe == "1" ~ "Grundschule",
                      Klassenstufe == "1_2" ~ "Grundschule",
                      Klassenstufe == "2" ~ "Grundschule",
                      Klassenstufe == "3" ~ "Grundschule",
                      Klassenstufe == "4" ~ "Grundschule",
                      TRUE ~ "Gymnasium"
         )))

ggplot(sh_not_p, aes(x = Klassenstufe, y = sh_not, color = Schulart)) + 
  geom_line(aes(group = NA), color = "grey", size = 1) +
  geom_point(aes(size = length_n)) +
  geom_text(aes(label = paste(round(sh_not,0), "%"), vjust = 3)) +
  scale_y_continuous(limits = c(0,(max(sh_not_p$sh_not)*1.1)), expand = c(0,0)) +
  geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
  geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
  xlab("Klassenstufe") +
  ylab("∅ absolute Häufigkeit") +
  labs(size = "Anzahl\neingegangener\nStunden") +
  theme_light() +
                theme(axis.line = element_line(colour = "#696f71"),
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      panel.border = element_blank(),
                      panel.background = element_rect(fill = "#f6f7f7")) 

Korrelationen

Bitte mit der Maus über die einzelnen Punkte fahren, um zu erfahren um welche Korrelation es sich handelt.


p_dataGS <- p_data %>%
  filter(Schulart == "Grundschule")

p_dataGY <- p_data %>%
  filter(Schulart == "Gymnasium")

impGS <- mice(p_dataGS, 
            maxit = maxit, 
            m = m,
            meth = meth,
            pred = pred,
            seed = 666,
            printFlag = F
            )

impGY <- mice(p_dataGY, 
            maxit = maxit, 
            m = m,
            meth = meth,
            pred = pred,
            seed = 666,
            printFlag = F
            )

corGS <- miceadds::micombine.cor(impGS, variables = c("lh_ank", "lh_auf", "lh_nen", 
                                                      "lh_sch", "lh_erl", "lh_wfr", 
                                                      "lh_wno", "lh_ano", "lh_bfr", 
                                                      "sh_auf", "sh_mel", "sh_fra", 
                                                      "sh_not"))

corGY <- miceadds::micombine.cor(impGY, variables = c("lh_ank", "lh_auf", "lh_nen", 
                                                      "lh_sch", "lh_erl", "lh_wfr", 
                                                      "lh_wno", "lh_ano", "lh_bfr", 
                                                      "sh_auf", "sh_mel", "sh_fra", 
                                                      "sh_not"))

corGS <- corGS[1:78,] %>%
    mutate(Korrelation = paste(variable1, variable2, sep="-"),
           rGS = round(r, digits = 3)) %>%
    dplyr::select(Korrelation, rGS)

corGY <- corGY[1:78,] %>%
    mutate(Korrelation = paste(variable1, variable2, sep="-"),
           rGY = round(r, digits = 3)) %>%
    dplyr::select(Korrelation, rGY)

corGSGY <- full_join(corGS, corGY, by = "Korrelation")


# library(ggrepel)
# ggplot(corGSGY, aes(x=rGS, y=rGY, color = Korrelation, label = Korrelation)) +
#   geom_point() +
#   scale_x_continuous(limits = c(0,1)) +
#   scale_y_continuous(limits = c(0,1)) +
#   geom_abline(intercept = 0, slope = 1) +
#   geom_label_repel() +
#   theme(legend.position = "none")


library(plotly)
axis_def <- list(range = c(-.4, 1),
                 dtick = 0.25,
                 automargin = TRUE)

plot_ly() %>%
  add_trace(
    x = c(-.4,-.4,1),
    y = c(-.4,  1, 1),
    name = "Korrelation in GY größer",
    type = 'scatter',
    fill = 'toself',
    fillcolor = '#BFEFFF',
    opacity = .4,
    hoveron = 'points',
    marker = list(
      color = '#BFEFFF',
      opacity = 0),
    line = list(
      color = '#BFEFFF'),
    text = "",
    hoverinfo = ''
    ) %>%
  add_trace(
    x = c(-.4, 1,1),
    y = c(-.4,-.4, 1),
    name = "Korrelation in GS größer",
    type = 'scatter',
    fill = 'toself',
    fillcolor = 'pink',
    opacity = .4,
    hoveron = 'points',
    marker = list(
      color = 'pink',
      opacity = 0),
    line = list(
      color = 'pink'),
    text = "",
    hoverinfo = ''
    ) %>%
  add_trace(
    data = corGSGY, 
    x = ~rGS, 
    y = ~rGY,
    type = "scatter", 
    mode = "markers",
    marker = list(
      color = "darkgrey",
      opacity = .85
    ),
    name = "Korrelationskoeffizient in GY und GS",
    text = ~paste("Korrelation: ", Korrelation, 
                  '<br>r<sub>GY</sub>= ', rGY, 
                  '<br>r<sub>GS</sub>= ', rGS)) %>%
  layout(
    xaxis = axis_def,
    yaxis = axis_def,
    autosize = F, 
    width = 800, 
    height = 800,
    legend = list(orientation = 'h',
                  x = 0,
                  y = 1))

pander(corGSGY)
Korrelation rGS rGY
lh_ank-lh_auf 0.192 0.178
lh_ank-lh_nen -0.013 0.001
lh_ank-lh_sch 0.111 0.222
lh_ank-lh_erl 0.149 0.185
lh_ank-lh_wfr 0.102 0.114
lh_ank-lh_bfr 0.061 0.008
lh_ank-lh_wno 0.181 0.107
lh_ank-lh_ano 0.133 0.126
lh_ank-sh_auf 0.234 0.244
lh_ank-sh_mel 0.028 0.054
lh_ank-sh_fra 0.052 0.023
lh_ank-sh_not 0.158 0.15
lh_auf-lh_nen 0.113 0.161
lh_auf-lh_sch 0.045 0.111
lh_auf-lh_erl 0.144 0.154
lh_auf-lh_wfr 0.143 0.176
lh_auf-lh_bfr 0.004 0.056
lh_auf-lh_wno 0.12 0.136
lh_auf-lh_ano 0.116 0.17
lh_auf-sh_auf 0.281 0.093
lh_auf-sh_mel -0.031 0.04
lh_auf-sh_fra -0.031 0.045
lh_auf-sh_not 0.077 -0.006
lh_nen-lh_sch 0.097 -0.107
lh_nen-lh_erl 0.136 0.025
lh_nen-lh_wfr 0.031 0.011
lh_nen-lh_bfr 0.031 -0.021
lh_nen-lh_wno 0.057 -0.046
lh_nen-lh_ano 0.056 0.004
lh_nen-sh_auf 0.099 0.152
lh_nen-sh_mel 0.063 -0.033
lh_nen-sh_fra 0.029 -0.035
lh_nen-sh_not -0.012 -0.046
lh_sch-lh_erl 0.113 0.148
lh_sch-lh_wfr 0.088 0.077
lh_sch-lh_bfr -0.073 0.091
lh_sch-lh_wno 0.464 0.377
lh_sch-lh_ano 0.349 0.292
lh_sch-sh_auf 0.105 0.11
lh_sch-sh_mel 0.002 -0.004
lh_sch-sh_fra -0.055 0.074
lh_sch-sh_not 0.544 0.585
lh_erl-lh_wfr 0.21 0.143
lh_erl-lh_bfr 0.183 0.151
lh_erl-lh_wno 0.03 0.235
lh_erl-lh_ano 0.136 0.208
lh_erl-sh_auf 0.307 0.111
lh_erl-sh_mel 0.176 0.098
lh_erl-sh_fra 0.143 0.15
lh_erl-sh_not 0.095 0.143
lh_wfr-lh_bfr 0.155 0.078
lh_wfr-lh_wno 0.139 0.165
lh_wfr-lh_ano 0.15 0.189
lh_wfr-sh_auf 0.085 0.185
lh_wfr-sh_mel 0.324 0.089
lh_wfr-sh_fra 0.137 0.048
lh_wfr-sh_not 0.126 0.052
lh_bfr-lh_wno -0.071 0.133
lh_bfr-lh_ano -0.002 0.132
lh_bfr-sh_auf 0.074 -0.001
lh_bfr-sh_mel 0.578 0.746
lh_bfr-sh_fra 0.937 0.918
lh_bfr-sh_not -0.069 0.1
lh_wno-lh_ano 0.637 0.556
lh_wno-sh_auf 0.07 0.12
lh_wno-sh_mel 0.05 0.008
lh_wno-sh_fra -0.044 0.122
lh_wno-sh_not 0.793 0.504
lh_ano-sh_auf 0.104 0.253
lh_ano-sh_mel 0.093 0.081
lh_ano-sh_fra 0.017 0.133
lh_ano-sh_not 0.702 0.44
sh_auf-sh_mel 0.059 -0.051
sh_auf-sh_fra 0.084 -0.001
sh_auf-sh_not 0.105 0.131
sh_mel-sh_fra 0.565 0.753
sh_mel-sh_not 0.053 0.005
sh_fra-sh_not -0.05 0.087

miceadds::micombine.cor(imp45, variables = c("beg_hw_min", "dau_hw_min"))

   variable1  variable2          r        rse fisher_r fisher_rse
1 beg_hw_min dau_hw_min -0.3256192 0.04471457 -0.33792 0.05000159
2 dau_hw_min beg_hw_min -0.3256192 0.04471457 -0.33792 0.05000159
         fmi         t            p    lower95    upper95
1 0.03852543 -6.758185 1.397312e-11 -0.4102579 -0.2354189
2 0.03852543 -6.758185 1.397312e-11 -0.4102579 -0.2354189

miceadds::micombine.cor(imp90, variables = c("beg_hw_min", "dau_hw_min"))

   variable1  variable2         r        rse   fisher_r fisher_rse
1 beg_hw_min dau_hw_min -0.484616 0.09540839 -0.5289995  0.1246324
2 dau_hw_min beg_hw_min -0.484616 0.09540839 -0.5289995  0.1246324
          fmi         t            p    lower95   upper95
1 0.009568712 -4.244477 2.191038e-05 -0.6488296 -0.277272
2 0.009568712 -4.244477 2.191038e-05 -0.6488296 -0.277272

# p_data_dm <- p_data %>%
#   dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
# dicho_r_m <- data.frame()
# 
# # Correlation matrix & visualization
# for(i in 1:7) {
#   for(j in 1:7){
#     # print(c(i,j))
#     tmp <- matrix(table(p_data_dm[,i], p_data_dm[,j]), 2, 2)
#     dicho_r_m[j,i] <- Yule(tmp)                                   #Alternative phi
#     dicho_r_m[i,j] <- Yule(tmp)                                   #Alternative phi
#   }
# }
# 
# names(dicho_r_m) <- c("lh_ank", "lh_auf", "lh_sch", "lh_erl", "lh_wfr", "lh_wno", "lh_ano")
# row.names(dicho_r_m) <- c("lh_ank", "lh_auf", "lh_sch", "lh_erl", "lh_wfr", "lh_wno", "lh_ano")
# # corrgram(dicho_r_m, type = "cor", lower.panel = "panel.pie", upper.panel = "panel.cor")
# 
# # GS vs. GYM
# 
# dicho_r_gs <- data.frame(cor_gs = NA, comb = NA)
# dicho_r_gs <- dicho_r_gs[-1,]
# 
# p_data_dm_gs <- p_data %>%
#   dplyr::filter(Schulart == "Grundschule") %>%
#   dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
# 
# for(i in 1:6) {
#   for(j in (1+i):7){
#     tmp <- matrix(table(p_data_dm_gs[,i], p_data_dm_gs[,j]), 2, 2)
#     tmp1 <-  data.frame()
#     tmp1[1, "cor_gs"] <- Yule(tmp)
#     tmp1[1, "comb"] <- paste(names(p_data_dm_gs[i]), "-", names(p_data_dm_gs[j]))
#     dicho_r_gs <- rbind(dicho_r_gs, tmp1)
#   }
# }
# 
# 
# 
# 
# dicho_r_gy <- data.frame(cor_gy = NA, comb = NA)
# dicho_r_gy <- dicho_r_gy[-1,]
# 
# p_data_dm_gy <- p_data %>%
#   dplyr::filter(Schulart == "Gymnasium") %>%
#   dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
# 
# for(i in 1:6) {
#   for(j in (1+i):7){
#     tmp <- matrix(table(p_data_dm_gy[,i], p_data_dm_gy[,j]), 2, 2)
#     tmp1 <-  data.frame()
#     tmp1[1, "cor_gy"] <- Yule(tmp)
#     tmp1[1, "comb"] <- paste(names(p_data_dm_gy[i]), "-", names(p_data_dm_gy[j]))
#     dicho_r_gy <- rbind(dicho_r_gy, tmp1)
#   }
# }
# 
# dicho_r <- full_join(dicho_r_gs, dicho_r_gy, by="comb")
# 
# 
# 
# ggplot(dicho_r, aes(x=cor_gs, y=cor_gy, color = comb, label = comb)) +
#   geom_point() +
#   scale_x_continuous(limits = c(0,1)) +
#   scale_y_continuous(limits = c(0,1)) +
#   geom_abline(intercept = 0, slope = 1) +
#   geom_label_repel() +
#   theme(legend.position = "none")